#!pip install pandasql
from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())
import pandas as pd
import numpy as np
scans = pd.read_excel('LAC Data Challenge Fall 2022 - Data.xlsx', sheet_name='Ticket Scan Data')
q1 = """SELECT event_name, strftime('%H:%M',event_datetime) AS eve_time FROM scans;"""
sqldf("SELECT event_name, IIF(strftime('%H:%M',event_datetime) < strftime('%H:%M','18:00'),'afternoon','evening') AS game_time FROM scans;")
| event_name | game_time | |
|---|---|---|
| 0 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | evening |
| 1 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | evening |
| 2 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | evening |
| 3 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | evening |
| 4 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | evening |
| ... | ... | ... |
| 236465 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | evening |
| 236466 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | evening |
| 236467 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | evening |
| 236468 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | evening |
| 236469 | LA Clippers 2021-22 Game 33 - New York 3/6/22 | evening |
236470 rows × 2 columns
#This SQL code extract the average attendence by evening(after 18:00) and afternoon(before 18:00)
attendance_by_time = """
SELECT
game_time, CAST(COUNT(game_time)/COUNT(DISTINCT event_name) AS int) AS average_attendence
FROM
(SELECT event_name, IIF(strftime('%H:%M',event_datetime) < strftime('%H:%M','18:00'),'afternoon','evening') AS game_time FROM scans)
GROUP BY game_time;"""
pysqldf(attendance_by_time)
| game_time | average_attendence | |
|---|---|---|
| 0 | afternoon | 5008 |
| 1 | evening | 5923 |
sqldf("SELECT event_name, strftime('%m',event_datetime) AS month FROM scans")
| event_name | month | |
|---|---|---|
| 0 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 10 |
| 1 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 10 |
| 2 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 10 |
| 3 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 10 |
| 4 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 10 |
| ... | ... | ... |
| 236465 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | 04 |
| 236466 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | 04 |
| 236467 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | 04 |
| 236468 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | 04 |
| 236469 | LA Clippers 2021-22 Game 33 - New York 3/6/22 | 03 |
236470 rows × 2 columns
#This is the code report the average attendance per game by month.
attendance_by_month = """
SELECT month,
CAST(COUNT(month)/COUNT(DISTINCT event_name) AS int) AS average_attendance
FROM
(SELECT event_name, strftime('%m',event_datetime) AS month FROM scans)
GROUP BY month; """
pysqldf(attendance_by_month)
| month | average_attendance | |
|---|---|---|
| 0 | 01 | 4414 |
| 1 | 02 | 6619 |
| 2 | 03 | 6555 |
| 3 | 04 | 6177 |
| 4 | 10 | 5700 |
| 5 | 11 | 5655 |
| 6 | 12 | 5559 |
pysqldf(attendance_by_month).plot.bar(x = 'month', y = 'average_attendance', rot=0)
<AxesSubplot:xlabel='month'>
average_num_of_fan_atTime = """
SELECT
AVG(twohrs_prior) AS 'avgFan_2hr_prior',
AVG(onehr_prior) AS 'avgFan_1hr_prior',
AVG(at_start) AS 'avgFan_at_start'
FROM(SELECT
event_name,
COUNT(CASE WHEN datetime(scan_datetime, '-2 hour') <= datetime(event_datetime) THEN 1 ELSE NULL END) AS 'at_start',
COUNT(CASE WHEN datetime(scan_datetime, '-2 hour') <= datetime(event_datetime, '-1 hour') THEN 1 ELSE NULL END) AS 'onehr_prior',
COUNT(CASE WHEN datetime(scan_datetime, '-2 hour') <= datetime(event_datetime, '-2 hour') THEN 1 ELSE NULL END) AS 'twohrs_prior'
FROM scans
GROUP BY event_name);
"""
pysqldf(average_num_of_fan_atTime)
| avgFan_2hr_prior | avgFan_1hr_prior | avgFan_at_start | |
|---|---|---|---|
| 0 | 2.073171 | 706.390244 | 3574.756098 |
member = pd.read_excel('LAC Data Challenge Fall 2022 - Data.xlsx', sheet_name='Customer Package Type')
sales = pd.read_excel('LAC Data Challenge Fall 2022 - Data.xlsx', sheet_name='Ticket Sales Data')
a = """
SELECT
customer_unique_id,
COUNT(DISTINCT seat_unique_id) AS 'ticket_purchase'
FROM sales
GROUP BY customer_unique_id;
"""
c = """
SELECT COUNT(CASE WHEN ticket_purchase >= 5 THEN 1 ELSE NULL END) AS 'purchase>=5'
FROM (
SELECT COUNT(DISTINCT seat_unique_id) AS 'ticket_purchase'
FROM sales
GROUP BY customer_unique_id);
"""
f = """
SELECT customer_unique_id, COUNT(DISTINCT seat_unique_id) AS 'ticket_scanned'
FROM(
SELECT sales.customer_unique_id, scans.seat_unique_id, sales.seat_unique_id
FROM scans
JOIN sales ON scans.seat_unique_id = sales.seat_unique_id)
GROUP BY customer_unique_id;
"""
h = """
SELECT COUNT(CASE WHEN ticket_scanned >= 3 THEN 1 ELSE NULL END) AS 'scanned>=3'
FROM (
SELECT COUNT(DISTINCT seat_unique_id) AS 'ticket_scanned'
FROM (
SELECT sales.customer_unique_id, scans.seat_unique_id, sales.seat_unique_id
FROM scans
JOIN sales ON scans.seat_unique_id = sales.seat_unique_id)
GROUP BY customer_unique_id);
"""
#Below is the complete code to show each customer_unique_id with purchase>=5 and scan >=3
i = """
SELECT a.customer_unique_id, b.ticket_scanned, a.ticket_purchase
FROM(
SELECT customer_unique_id, COUNT(DISTINCT seat_unique_id) AS 'ticket_purchase'
FROM sales
GROUP BY customer_unique_id) a
JOIN(
SELECT customer_unique_id, COUNT(DISTINCT seat_unique_id) AS 'ticket_scanned'
FROM(
SELECT sales.customer_unique_id, scans.seat_unique_id, sales.seat_unique_id
FROM scans
JOIN sales ON scans.seat_unique_id = sales.seat_unique_id)
GROUP BY customer_unique_id) b
ON a.customer_unique_id = b.customer_unique_id
WHERE b.ticket_scanned >= 3 AND a.ticket_purchase >= 5;
"""
pysqldf(i)
| customer_unique_id | ticket_scanned | ticket_purchase | |
|---|---|---|---|
| 0 | 124908643:10017873 | 36 | 40 |
| 1 | 124908643:10067883 | 42 | 46 |
| 2 | 124908643:10072874 | 32 | 40 |
| 3 | 124908643:10081887 | 62 | 80 |
| 4 | 124908643:10191875 | 56 | 82 |
| ... | ... | ... | ... |
| 3878 | 124908643:9748886 | 14 | 44 |
| 3879 | 124908643:9803914 | 78 | 82 |
| 3880 | 124908643:9829877 | 139 | 164 |
| 3881 | 124908643:9956873 | 16 | 24 |
| 3882 | 124908643:9992875 | 27 | 40 |
3883 rows × 3 columns
num_account_df = pysqldf(i)
num_account_df.head()
| customer_unique_id | ticket_scanned | ticket_purchase | |
|---|---|---|---|
| 0 | 124908643:10017873 | 36 | 40 |
| 1 | 124908643:10067883 | 42 | 46 |
| 2 | 124908643:10072874 | 32 | 40 |
| 3 | 124908643:10081887 | 62 | 80 |
| 4 | 124908643:10191875 | 56 | 82 |
#Count the member account with purchase >= 5 and scanned >=3
num_member = """
SELECT COUNT(m.customer_unique_id) AS member_account
FROM member m
JOIN num_account_df n
ON m.customer_unique_id = n.customer_unique_id;
"""
pysqldf(num_member)
| member_account | |
|---|---|
| 0 | 2411 |
seating = pd.read_excel('LAC Data Challenge Fall 2022 - Data.xlsx', sheet_name='Seating Chart')
#Calculate the capacity for each section:
cap = """
SELECT section_name, SUM(seat_count) AS section_capacity
FROM seating
GROUP BY section_name;
"""
capacity = pysqldf(cap)
capacity.head()
| section_name | section_capacity | |
|---|---|---|
| 0 | 101 | 388 |
| 1 | 102 | 393 |
| 2 | 103 | 204 |
| 3 | 104 | 126 |
| 4 | 105 | 254 |
#Calculate the attendance for each section by game on average:
sec = """
SELECT section_name, SUM(section_att)/COUNT(DISTINCT event_name) AS 'avg_sec_att'
FROM(
SELECT event_name, section_name, COUNT(seat_unique_id) AS 'section_att'
FROM scans
GROUP BY event_name, section_name)
GROUP BY section_name;
"""
att_by_section = pysqldf(sec)
att_by_section.head()
| section_name | avg_sec_att | |
|---|---|---|
| 0 | 101 | 273 |
| 1 | 102 | 292 |
| 2 | 103 | 139 |
| 3 | 104 | 86 |
| 4 | 105 | 187 |
#Merge 2 tables and calculate the fit capacity of each section
fit_capacity = capacity.merge(att_by_section, how = 'left', on = 'section_name')
fit_capacity['percentage_cap'] = fit_capacity['avg_sec_att']/fit_capacity['section_capacity']
fit_capacity.nlargest(n=10, columns = ['percentage_cap'])
| section_name | section_capacity | avg_sec_att | percentage_cap | |
|---|---|---|---|---|
| 5 | 106 | 502 | 374 | 0.745020 |
| 1 | 102 | 393 | 292 | 0.743003 |
| 4 | 105 | 254 | 187 | 0.736220 |
| 13 | 114 | 300 | 219 | 0.730000 |
| 6 | 107 | 473 | 345 | 0.729387 |
| 12 | 113 | 179 | 130 | 0.726257 |
| 14 | 115 | 478 | 346 | 0.723849 |
| 8 | 109 | 174 | 124 | 0.712644 |
| 0 | 101 | 388 | 273 | 0.703608 |
| 15 | 116 | 522 | 364 | 0.697318 |
#Sales/attendance for each game
sale_att = """
SELECT a.event_name, date(b.event_datetime) AS 'event_date', game_sales, game_attendance
FROM(
SELECT event_name, COUNT(seat_unique_id) AS 'game_attendance'
FROM scans
GROUP BY event_name) a
JOIN(
SELECT event_name, event_datetime, COUNT(seat_unique_id) AS 'game_sales'
FROM sales
GROUP BY event_name) b
ON a.event_name = b.event_name;
"""
sales_att_by_game = pysqldf(sale_att)
sales_att_by_game = sales_att_by_game.sort_values(by='event_date')
import re
#split the event_name column with only the city names
sales_att_by_game['event_name'].str.split("-", expand = True)[2]
cities = [item[0] for item in [re.split(r'(\d+)', s) for s in sales_att_by_game['event_name'].str.split("-", expand = True)[2]]]
#Clear the space in the city string
cities = [city.strip() for city in cities]
sales_att_by_game['City'] = cities
sales_att_by_game
| event_name | event_date | game_sales | game_attendance | City | |
|---|---|---|---|---|---|
| 0 | LA Clippers 2021-22 Game 1 - Memphis 10/23/21 | 2021-10-23 | 7934 | 6498 | Memphis |
| 11 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 2021-10-25 | 7417 | 5738 | Portland |
| 22 | LA Clippers 2021-22 Game 3 - Cleveland 10/27/21 | 2021-10-27 | 7044 | 4866 | Cleveland |
| 33 | LA Clippers 2021-22 Game 4 - Oklahoma City 11/... | 2021-11-01 | 7142 | 4784 | Oklahoma City |
| 36 | LA Clippers 2021-22 Game 5 - Charlotte 11/7/21 | 2021-11-07 | 7838 | 6303 | Charlotte |
| 37 | LA Clippers 2021-22 Game 6 - Portland 11/9/21 | 2021-11-09 | 7339 | 5531 | Portland |
| 38 | LA Clippers 2021-22 Game 7 - Miami 11/11/21 | 2021-11-11 | 7340 | 6200 | Miami |
| 39 | LA Clippers 2021-22 Game 8 - Minnesota 11/13/21 | 2021-11-13 | 7731 | 6045 | Minnesota |
| 40 | LA Clippers 2021-22 Game 9 - Chicago 11/14/21 | 2021-11-14 | 7888 | 6789 | Chicago |
| 1 | LA Clippers 2021-22 Game 10 - San Antonio 11/1... | 2021-11-16 | 7126 | 4928 | San Antonio |
| 2 | LA Clippers 2021-22 Game 11 - Dallas 11/21/21 | 2021-11-21 | 6832 | 5085 | Dallas |
| 3 | LA Clippers 2021-22 Game 12 - Dallas 11/23/21 | 2021-11-23 | 7339 | 5903 | Dallas |
| 4 | LA Clippers 2021-22 Game 13 - Detroit 11/26/21 | 2021-11-26 | 7477 | 5210 | Detroit |
| 5 | LA Clippers 2021-22 Game 14 - Golden State 11/... | 2021-11-28 | 7622 | 6832 | Golden State |
| 6 | LA Clippers 2021-22 Game 15 - New Orleans11/29/21 | 2021-11-29 | 6713 | 4257 | New Orleans |
| 7 | LA Clippers 2021-22 Game 16 - Sacramento 12/1/21 | 2021-12-01 | 6747 | 4558 | Sacramento |
| 8 | LA Clippers 2021-22 Game 17 - Boston 12/8/21 | 2021-12-08 | 7456 | 6179 | Boston |
| 9 | LA Clippers 2021-22 Game 18 - Orlando 12/11/21 | 2021-12-11 | 7361 | 4814 | Orlando |
| 10 | LA Clippers 2021-22 Game 19 - Phoenix 12/13/21 | 2021-12-13 | 7678 | 6190 | Phoenix |
| 12 | LA Clippers 2021-22 Game 20 - San Antonio 12/2... | 2021-12-20 | 7835 | 5778 | San Antonio |
| 13 | LA Clippers 2021-22 Game 21 - Denver 12/26/21 | 2021-12-26 | 7399 | 5566 | Denver |
| 14 | LA Clippers 2021-22 Game 22 - Brooklyn 12/27/21 | 2021-12-27 | 7497 | 5829 | Brooklyn |
| 15 | LA Clippers 2021-22 Game 23 - Minnesota 1/3/22 | 2022-01-03 | 7170 | 4587 | Minnesota |
| 16 | LA Clippers 2021-22 Game 24 - Memphis 1/8/22 | 2022-01-08 | 7591 | 4850 | Memphis |
| 17 | LA Clippers 2021-22 Game 25 - Atlanta 1/9/22 | 2022-01-09 | 6830 | 4053 | Atlanta |
| 18 | LA Clippers 2021-22 Game 26 - Denver 1/11/22 | 2022-01-11 | 6921 | 4369 | Denver |
| 19 | LA Clippers 2021-22 Game 27 - Indiana 1/17/22 | 2022-01-17 | 6901 | 4212 | Indiana |
| 20 | LA Clippers 2021-22 Game 28 - Los Angeles 2/3/22 | 2022-02-03 | 7610 | 6913 | Los Angeles |
| 21 | LA Clippers 2021-22 Game 29 - Milwaukee 2/6/22 | 2022-02-06 | 7678 | 6403 | Milwaukee |
| 23 | LA Clippers 2021-22 Game 30 - Golden State 2/1... | 2022-02-14 | 8033 | 7311 | Golden State |
| 24 | LA Clippers 2021-22 Game 31 - Houston 2/17/22 | 2022-02-17 | 8006 | 5849 | Houston |
| 25 | LA Clippers 2021-22 Game 32 - Los Angeles 3/3/22 | 2022-03-03 | 8073 | 7376 | Los Angeles |
| 26 | LA Clippers 2021-22 Game 33 - New York 3/6/22 | 2022-03-06 | 7925 | 6198 | New York |
| 27 | LA Clippers 2021-22 Game 34 - Washington 3/9/22 | 2022-03-09 | 7734 | 5629 | Washington |
| 28 | LA Clippers 2021-22 Game 35 - Toronto 3/16/22 | 2022-03-16 | 8189 | 6774 | Toronto |
| 29 | LA Clippers 2021-22 Game 36 - Philadelphia 3/2... | 2022-03-25 | 8178 | 7162 | Philadelphia |
| 30 | LA Clippers 2021-22 Game 37 - Utah 3/29/22 | 2022-03-29 | 7853 | 6192 | Utah |
| 31 | LA Clippers 2021-22 Game 38 - New Orleans 4/3/22 | 2022-04-03 | 7586 | 6164 | New Orleans |
| 32 | LA Clippers 2021-22 Game 39 - Phoenix 4/6/22 | 2022-04-06 | 7850 | 6631 | Phoenix |
| 34 | LA Clippers 2021-22 Game 40 - Sacramento 4/9/22 | 2022-04-09 | 7685 | 5942 | Sacramento |
| 35 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | 2022-04-10 | 7534 | 5972 | Oklahoma City |
team = pd.read_excel('LAC Data Challenge Fall 2022 - Data.xlsx', sheet_name='Team Data')
#Because only for the opponent teams, drop our home team
team1 = team.drop(12)
#Merge datasets
team2 = team1.merge(sales_att_by_game, on='City')
vegas_attendance = team2[["Team", "Vegas Over/Under 21/22", "game_attendance"]]
#Calculate the correlation
vegas_attendance['Vegas Over/Under 21/22'].corr(vegas_attendance['game_attendance'])
0.3690724620298187
#Make the best-fit plot
import matplotlib.pyplot as plt
import seaborn as sns
sns.lmplot(x='Vegas Over/Under 21/22', y='game_attendance', data=vegas_attendance)
<seaborn.axisgrid.FacetGrid at 0x7fd4794f2130>
#Code for scraping the Basketball Reference and pulling in data
url = 'https://www.basketball-reference.com/teams/LAC/2022_games.html'
d = pd.read_html(url)[0]
result = pd.DataFrame(d)
#rename the columns and drop the useless columns
result.rename( columns={'Unnamed: 5':'homeaway', 'Unnamed: 7':'W/L','Unnamed: 8': 'OT'}, inplace=True )
result = result[result['Opponent'] != 'Opponent']
del result['Unnamed: 3']
del result['Unnamed: 4']
result.head()
| G | Date | Start (ET) | homeaway | Opponent | W/L | OT | Tm | Opp | W | L | Streak | Notes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Thu, Oct 21, 2021 | 10:00p | @ | Golden State Warriors | L | NaN | 113 | 115 | 0 | 1 | L 1 | NaN |
| 1 | 2 | Sat, Oct 23, 2021 | 10:30p | NaN | Memphis Grizzlies | L | NaN | 114 | 120 | 0 | 2 | L 2 | NaN |
| 2 | 3 | Mon, Oct 25, 2021 | 10:30p | NaN | Portland Trail Blazers | W | NaN | 116 | 86 | 1 | 2 | W 1 | NaN |
| 3 | 4 | Wed, Oct 27, 2021 | 10:30p | NaN | Cleveland Cavaliers | L | NaN | 79 | 92 | 1 | 3 | L 1 | NaN |
| 4 | 5 | Fri, Oct 29, 2021 | 10:00p | @ | Portland Trail Blazers | L | NaN | 92 | 111 | 1 | 4 | L 2 | NaN |
#Modify the date format in order to merge the table with ticket purchase and attendance.
result1 = result
#Filter the 4 W3 Games
df_w3 = result1[(result1['Streak'] == "W 3")]
df_w3
| G | Date | Start (ET) | homeaway | Opponent | W/L | OT | Tm | Opp | W | L | Streak | Notes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 7 | 8 | Fri, Nov 5, 2021 | 8:00p | @ | Minnesota Timberwolves | W | NaN | 104 | 84 | 4 | 4 | W 3 | NaN |
| 27 | 27 | Sat, Dec 11, 2021 | 3:30p | NaN | Orlando Magic | W | NaN | 106 | 104 | 15 | 12 | W 3 | NaN |
| 65 | 63 | Sun, Feb 27, 2022 | 7:00p | @ | Houston Rockets | W | NaN | 99 | 98 | 32 | 31 | W 3 | NaN |
| 82 | 80 | Wed, Apr 6, 2022 | 10:00p | NaN | Phoenix Suns | W | NaN | 113 | 109 | 40 | 40 | W 3 | NaN |
#Filter the 8 home games after W3 games
df_afterw3 = result1.filter(items = [8,9,10,28,29,30,66,67,68,83,84,85], axis=0)
df_8games = df_afterw3[(df_afterw3['homeaway'].isnull())]
df_8games
| G | Date | Start (ET) | homeaway | Opponent | W/L | OT | Tm | Opp | W | L | Streak | Notes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 9 | Sun, Nov 7, 2021 | 9:00p | NaN | Charlotte Hornets | W | NaN | 120 | 106 | 5 | 4 | W 4 | NaN |
| 9 | 10 | Tue, Nov 9, 2021 | 10:00p | NaN | Portland Trail Blazers | W | NaN | 117 | 109 | 6 | 4 | W 5 | NaN |
| 10 | 11 | Thu, Nov 11, 2021 | 10:30p | NaN | Miami Heat | W | NaN | 112 | 109 | 7 | 4 | W 6 | NaN |
| 28 | 28 | Mon, Dec 13, 2021 | 10:30p | NaN | Phoenix Suns | W | NaN | 111 | 95 | 16 | 12 | W 4 | NaN |
| 67 | 65 | Thu, Mar 3, 2022 | 10:00p | NaN | Los Angeles Lakers | W | NaN | 132 | 111 | 34 | 31 | W 5 | NaN |
| 68 | 66 | Sun, Mar 6, 2022 | 10:00p | NaN | New York Knicks | L | NaN | 93 | 116 | 34 | 32 | L 1 | NaN |
| 84 | 81 | Sat, Apr 9, 2022 | 9:30p | NaN | Sacramento Kings | W | NaN | 117 | 98 | 41 | 40 | W 4 | NaN |
| 85 | 82 | Sun, Apr 10, 2022 | 9:30p | NaN | Oklahoma City Thunder | W | NaN | 138 | 88 | 42 | 40 | W 5 | NaN |
#Filter all the home games
df_homegames = result1[(result1['homeaway'].isnull()) & (result['Notes'] != 'Play-In Game')]
df_homegames.head()
| G | Date | Start (ET) | homeaway | Opponent | W/L | OT | Tm | Opp | W | L | Streak | Notes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 2 | Sat, Oct 23, 2021 | 10:30p | NaN | Memphis Grizzlies | L | NaN | 114 | 120 | 0 | 2 | L 2 | NaN |
| 2 | 3 | Mon, Oct 25, 2021 | 10:30p | NaN | Portland Trail Blazers | W | NaN | 116 | 86 | 1 | 2 | W 1 | NaN |
| 3 | 4 | Wed, Oct 27, 2021 | 10:30p | NaN | Cleveland Cavaliers | L | NaN | 79 | 92 | 1 | 3 | L 1 | NaN |
| 5 | 6 | Mon, Nov 1, 2021 | 10:30p | NaN | Oklahoma City Thunder | W | NaN | 99 | 94 | 2 | 4 | W 1 | NaN |
| 8 | 9 | Sun, Nov 7, 2021 | 9:00p | NaN | Charlotte Hornets | W | NaN | 120 | 106 | 5 | 4 | W 4 | NaN |
#Filter the 33 home games
df_8games['streak'] = 'streak'
df_8games.set_index('streak', append=True, inplace=True)
df_homegames['other'] = 'other'
df_homegames.set_index('other', append=True, inplace=True)
merged = df_8games.append(df_homegames)
merged = merged.drop_duplicates().sort_index()
idx = pd.IndexSlice
df_33games = merged.loc[idx[:, 'other'], :]
df_33games.index
df_33games = df_33games.reset_index(drop=True)
df_33games
<ipython-input-505-901245bade2d>:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy <ipython-input-505-901245bade2d>:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| G | Date | Start (ET) | homeaway | Opponent | W/L | OT | Tm | Opp | W | L | Streak | Notes | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | Sat, Oct 23, 2021 | 10:30p | NaN | Memphis Grizzlies | L | NaN | 114 | 120 | 0 | 2 | L 2 | NaN |
| 1 | 3 | Mon, Oct 25, 2021 | 10:30p | NaN | Portland Trail Blazers | W | NaN | 116 | 86 | 1 | 2 | W 1 | NaN |
| 2 | 4 | Wed, Oct 27, 2021 | 10:30p | NaN | Cleveland Cavaliers | L | NaN | 79 | 92 | 1 | 3 | L 1 | NaN |
| 3 | 6 | Mon, Nov 1, 2021 | 10:30p | NaN | Oklahoma City Thunder | W | NaN | 99 | 94 | 2 | 4 | W 1 | NaN |
| 4 | 12 | Sat, Nov 13, 2021 | 10:30p | NaN | Minnesota Timberwolves | W | NaN | 129 | 102 | 8 | 4 | W 7 | NaN |
| 5 | 13 | Sun, Nov 14, 2021 | 9:30p | NaN | Chicago Bulls | L | NaN | 90 | 100 | 8 | 5 | L 1 | NaN |
| 6 | 14 | Tue, Nov 16, 2021 | 10:30p | NaN | San Antonio Spurs | W | NaN | 106 | 92 | 9 | 5 | W 1 | NaN |
| 7 | 17 | Sun, Nov 21, 2021 | 3:30p | NaN | Dallas Mavericks | W | NaN | 97 | 91 | 10 | 7 | W 1 | NaN |
| 8 | 18 | Tue, Nov 23, 2021 | 10:30p | NaN | Dallas Mavericks | L | OT | 104 | 112 | 10 | 8 | L 1 | NaN |
| 9 | 19 | Fri, Nov 26, 2021 | 3:30p | NaN | Detroit Pistons | W | NaN | 107 | 96 | 11 | 8 | W 1 | NaN |
| 10 | 20 | Sun, Nov 28, 2021 | 3:30p | NaN | Golden State Warriors | L | NaN | 90 | 105 | 11 | 9 | L 1 | NaN |
| 11 | 21 | Mon, Nov 29, 2021 | 10:30p | NaN | New Orleans Pelicans | L | NaN | 104 | 123 | 11 | 10 | L 2 | NaN |
| 12 | 22 | Wed, Dec 1, 2021 | 10:30p | NaN | Sacramento Kings | L | NaN | 115 | 124 | 11 | 11 | L 3 | NaN |
| 13 | 26 | Wed, Dec 8, 2021 | 10:30p | NaN | Boston Celtics | W | NaN | 114 | 111 | 14 | 12 | W 2 | NaN |
| 14 | 27 | Sat, Dec 11, 2021 | 3:30p | NaN | Orlando Magic | W | NaN | 106 | 104 | 15 | 12 | W 3 | NaN |
| 15 | 31 | Mon, Dec 20, 2021 | 10:30p | NaN | San Antonio Spurs | L | NaN | 92 | 116 | 16 | 15 | L 3 | NaN |
| 16 | 33 | Sun, Dec 26, 2021 | 9:00p | NaN | Denver Nuggets | L | NaN | 100 | 103 | 17 | 16 | L 1 | NaN |
| 17 | 34 | Mon, Dec 27, 2021 | 10:30p | NaN | Brooklyn Nets | L | NaN | 108 | 124 | 17 | 17 | L 2 | NaN |
| 18 | 38 | Mon, Jan 3, 2022 | 10:30p | NaN | Minnesota Timberwolves | L | NaN | 104 | 122 | 19 | 19 | L 1 | NaN |
| 19 | 40 | Sat, Jan 8, 2022 | 3:30p | NaN | Memphis Grizzlies | L | NaN | 108 | 123 | 19 | 21 | L 3 | NaN |
| 20 | 41 | Sun, Jan 9, 2022 | 3:30p | NaN | Atlanta Hawks | W | NaN | 106 | 93 | 20 | 21 | W 1 | NaN |
| 21 | 42 | Tue, Jan 11, 2022 | 10:30p | NaN | Denver Nuggets | W | NaN | 87 | 85 | 21 | 21 | W 2 | NaN |
| 22 | 45 | Mon, Jan 17, 2022 | 3:30p | NaN | Indiana Pacers | W | NaN | 139 | 133 | 22 | 23 | W 1 | NaN |
| 23 | 54 | Thu, Feb 3, 2022 | 10:00p | NaN | Los Angeles Lakers | W | NaN | 111 | 110 | 27 | 27 | W 1 | NaN |
| 24 | 55 | Sun, Feb 6, 2022 | 9:00p | NaN | Milwaukee Bucks | L | NaN | 113 | 137 | 27 | 28 | L 1 | NaN |
| 25 | 59 | Mon, Feb 14, 2022 | 10:30p | NaN | Golden State Warriors | W | NaN | 119 | 104 | 29 | 30 | W 2 | NaN |
| 26 | 61 | Thu, Feb 17, 2022 | 10:30p | NaN | Houston Rockets | W | NaN | 142 | 111 | 30 | 31 | W 1 | NaN |
| 27 | 68 | Wed, Mar 9, 2022 | 10:30p | NaN | Washington Wizards | W | NaN | 115 | 109 | 35 | 33 | W 1 | NaN |
| 28 | 72 | Wed, Mar 16, 2022 | 10:30p | NaN | Toronto Raptors | L | NaN | 100 | 103 | 36 | 36 | L 2 | NaN |
| 29 | 75 | Fri, Mar 25, 2022 | 10:30p | NaN | Philadelphia 76ers | L | NaN | 97 | 122 | 36 | 39 | L 5 | NaN |
| 30 | 76 | Tue, Mar 29, 2022 | 10:00p | NaN | Utah Jazz | W | NaN | 121 | 115 | 37 | 39 | W 1 | NaN |
| 31 | 79 | Sun, Apr 3, 2022 | 9:30p | NaN | New Orleans Pelicans | W | NaN | 119 | 100 | 39 | 40 | W 2 | NaN |
| 32 | 80 | Wed, Apr 6, 2022 | 10:00p | NaN | Phoenix Suns | W | NaN | 113 | 109 | 40 | 40 | W 3 | NaN |
#Merge datasets with 8 games
from datetime import datetime
df_8games['event_date'] = df_8games['Date'].str.split(",", expand = True).drop(columns = 0)[1]+df_8games['Date'].str.split(",", expand = True).drop(columns = 0)[2]
df_8games['event_date'].astype(str)
df_8games['event_date'] = pd.to_datetime(df_8games['event_date']).astype(str)
#Merge datasets
#del df_8games['Date'] #Drop the useless column,already has event_date
new_df_8games = df_8games.merge(sales_att_by_game, on='event_date')
new_df_8games
<ipython-input-506-a125996dc749>:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy <ipython-input-506-a125996dc749>:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| G | Date | Start (ET) | homeaway | Opponent | W/L | OT | Tm | Opp | W | L | Streak | Notes | event_date | event_name | game_sales | game_attendance | City | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 9 | Sun, Nov 7, 2021 | 9:00p | NaN | Charlotte Hornets | W | NaN | 120 | 106 | 5 | 4 | W 4 | NaN | 2021-11-07 | LA Clippers 2021-22 Game 5 - Charlotte 11/7/21 | 7838 | 6303 | Charlotte |
| 1 | 10 | Tue, Nov 9, 2021 | 10:00p | NaN | Portland Trail Blazers | W | NaN | 117 | 109 | 6 | 4 | W 5 | NaN | 2021-11-09 | LA Clippers 2021-22 Game 6 - Portland 11/9/21 | 7339 | 5531 | Portland |
| 2 | 11 | Thu, Nov 11, 2021 | 10:30p | NaN | Miami Heat | W | NaN | 112 | 109 | 7 | 4 | W 6 | NaN | 2021-11-11 | LA Clippers 2021-22 Game 7 - Miami 11/11/21 | 7340 | 6200 | Miami |
| 3 | 28 | Mon, Dec 13, 2021 | 10:30p | NaN | Phoenix Suns | W | NaN | 111 | 95 | 16 | 12 | W 4 | NaN | 2021-12-13 | LA Clippers 2021-22 Game 19 - Phoenix 12/13/21 | 7678 | 6190 | Phoenix |
| 4 | 65 | Thu, Mar 3, 2022 | 10:00p | NaN | Los Angeles Lakers | W | NaN | 132 | 111 | 34 | 31 | W 5 | NaN | 2022-03-03 | LA Clippers 2021-22 Game 32 - Los Angeles 3/3/22 | 8073 | 7376 | Los Angeles |
| 5 | 66 | Sun, Mar 6, 2022 | 10:00p | NaN | New York Knicks | L | NaN | 93 | 116 | 34 | 32 | L 1 | NaN | 2022-03-06 | LA Clippers 2021-22 Game 33 - New York 3/6/22 | 7925 | 6198 | New York |
| 6 | 81 | Sat, Apr 9, 2022 | 9:30p | NaN | Sacramento Kings | W | NaN | 117 | 98 | 41 | 40 | W 4 | NaN | 2022-04-09 | LA Clippers 2021-22 Game 40 - Sacramento 4/9/22 | 7685 | 5942 | Sacramento |
| 7 | 82 | Sun, Apr 10, 2022 | 9:30p | NaN | Oklahoma City Thunder | W | NaN | 138 | 88 | 42 | 40 | W 5 | NaN | 2022-04-10 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | 7534 | 5972 | Oklahoma City |
#Merge datasets with 33 games
from datetime import datetime
df_33games['event_date'] = df_33games['Date'].str.split(",", expand = True).drop(columns = 0)[1]+df_33games['Date'].str.split(",", expand = True).drop(columns = 0)[2]
#result1['event_date'].astype(str)
df_33games['event_date'] = pd.to_datetime(df_33games['event_date']).astype(str)
#Merge datasets
#del df_33games['Date']
new_df_33games = df_33games.merge(sales_att_by_game, on='event_date')
new_df_33games
| G | Date | Start (ET) | homeaway | Opponent | W/L | OT | Tm | Opp | W | L | Streak | Notes | event_date | event_name | game_sales | game_attendance | City | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | Sat, Oct 23, 2021 | 10:30p | NaN | Memphis Grizzlies | L | NaN | 114 | 120 | 0 | 2 | L 2 | NaN | 2021-10-23 | LA Clippers 2021-22 Game 1 - Memphis 10/23/21 | 7934 | 6498 | Memphis |
| 1 | 3 | Mon, Oct 25, 2021 | 10:30p | NaN | Portland Trail Blazers | W | NaN | 116 | 86 | 1 | 2 | W 1 | NaN | 2021-10-25 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 7417 | 5738 | Portland |
| 2 | 4 | Wed, Oct 27, 2021 | 10:30p | NaN | Cleveland Cavaliers | L | NaN | 79 | 92 | 1 | 3 | L 1 | NaN | 2021-10-27 | LA Clippers 2021-22 Game 3 - Cleveland 10/27/21 | 7044 | 4866 | Cleveland |
| 3 | 6 | Mon, Nov 1, 2021 | 10:30p | NaN | Oklahoma City Thunder | W | NaN | 99 | 94 | 2 | 4 | W 1 | NaN | 2021-11-01 | LA Clippers 2021-22 Game 4 - Oklahoma City 11/... | 7142 | 4784 | Oklahoma City |
| 4 | 12 | Sat, Nov 13, 2021 | 10:30p | NaN | Minnesota Timberwolves | W | NaN | 129 | 102 | 8 | 4 | W 7 | NaN | 2021-11-13 | LA Clippers 2021-22 Game 8 - Minnesota 11/13/21 | 7731 | 6045 | Minnesota |
| 5 | 13 | Sun, Nov 14, 2021 | 9:30p | NaN | Chicago Bulls | L | NaN | 90 | 100 | 8 | 5 | L 1 | NaN | 2021-11-14 | LA Clippers 2021-22 Game 9 - Chicago 11/14/21 | 7888 | 6789 | Chicago |
| 6 | 14 | Tue, Nov 16, 2021 | 10:30p | NaN | San Antonio Spurs | W | NaN | 106 | 92 | 9 | 5 | W 1 | NaN | 2021-11-16 | LA Clippers 2021-22 Game 10 - San Antonio 11/1... | 7126 | 4928 | San Antonio |
| 7 | 17 | Sun, Nov 21, 2021 | 3:30p | NaN | Dallas Mavericks | W | NaN | 97 | 91 | 10 | 7 | W 1 | NaN | 2021-11-21 | LA Clippers 2021-22 Game 11 - Dallas 11/21/21 | 6832 | 5085 | Dallas |
| 8 | 18 | Tue, Nov 23, 2021 | 10:30p | NaN | Dallas Mavericks | L | OT | 104 | 112 | 10 | 8 | L 1 | NaN | 2021-11-23 | LA Clippers 2021-22 Game 12 - Dallas 11/23/21 | 7339 | 5903 | Dallas |
| 9 | 19 | Fri, Nov 26, 2021 | 3:30p | NaN | Detroit Pistons | W | NaN | 107 | 96 | 11 | 8 | W 1 | NaN | 2021-11-26 | LA Clippers 2021-22 Game 13 - Detroit 11/26/21 | 7477 | 5210 | Detroit |
| 10 | 20 | Sun, Nov 28, 2021 | 3:30p | NaN | Golden State Warriors | L | NaN | 90 | 105 | 11 | 9 | L 1 | NaN | 2021-11-28 | LA Clippers 2021-22 Game 14 - Golden State 11/... | 7622 | 6832 | Golden State |
| 11 | 21 | Mon, Nov 29, 2021 | 10:30p | NaN | New Orleans Pelicans | L | NaN | 104 | 123 | 11 | 10 | L 2 | NaN | 2021-11-29 | LA Clippers 2021-22 Game 15 - New Orleans11/29/21 | 6713 | 4257 | New Orleans |
| 12 | 22 | Wed, Dec 1, 2021 | 10:30p | NaN | Sacramento Kings | L | NaN | 115 | 124 | 11 | 11 | L 3 | NaN | 2021-12-01 | LA Clippers 2021-22 Game 16 - Sacramento 12/1/21 | 6747 | 4558 | Sacramento |
| 13 | 26 | Wed, Dec 8, 2021 | 10:30p | NaN | Boston Celtics | W | NaN | 114 | 111 | 14 | 12 | W 2 | NaN | 2021-12-08 | LA Clippers 2021-22 Game 17 - Boston 12/8/21 | 7456 | 6179 | Boston |
| 14 | 27 | Sat, Dec 11, 2021 | 3:30p | NaN | Orlando Magic | W | NaN | 106 | 104 | 15 | 12 | W 3 | NaN | 2021-12-11 | LA Clippers 2021-22 Game 18 - Orlando 12/11/21 | 7361 | 4814 | Orlando |
| 15 | 31 | Mon, Dec 20, 2021 | 10:30p | NaN | San Antonio Spurs | L | NaN | 92 | 116 | 16 | 15 | L 3 | NaN | 2021-12-20 | LA Clippers 2021-22 Game 20 - San Antonio 12/2... | 7835 | 5778 | San Antonio |
| 16 | 33 | Sun, Dec 26, 2021 | 9:00p | NaN | Denver Nuggets | L | NaN | 100 | 103 | 17 | 16 | L 1 | NaN | 2021-12-26 | LA Clippers 2021-22 Game 21 - Denver 12/26/21 | 7399 | 5566 | Denver |
| 17 | 34 | Mon, Dec 27, 2021 | 10:30p | NaN | Brooklyn Nets | L | NaN | 108 | 124 | 17 | 17 | L 2 | NaN | 2021-12-27 | LA Clippers 2021-22 Game 22 - Brooklyn 12/27/21 | 7497 | 5829 | Brooklyn |
| 18 | 38 | Mon, Jan 3, 2022 | 10:30p | NaN | Minnesota Timberwolves | L | NaN | 104 | 122 | 19 | 19 | L 1 | NaN | 2022-01-03 | LA Clippers 2021-22 Game 23 - Minnesota 1/3/22 | 7170 | 4587 | Minnesota |
| 19 | 40 | Sat, Jan 8, 2022 | 3:30p | NaN | Memphis Grizzlies | L | NaN | 108 | 123 | 19 | 21 | L 3 | NaN | 2022-01-08 | LA Clippers 2021-22 Game 24 - Memphis 1/8/22 | 7591 | 4850 | Memphis |
| 20 | 41 | Sun, Jan 9, 2022 | 3:30p | NaN | Atlanta Hawks | W | NaN | 106 | 93 | 20 | 21 | W 1 | NaN | 2022-01-09 | LA Clippers 2021-22 Game 25 - Atlanta 1/9/22 | 6830 | 4053 | Atlanta |
| 21 | 42 | Tue, Jan 11, 2022 | 10:30p | NaN | Denver Nuggets | W | NaN | 87 | 85 | 21 | 21 | W 2 | NaN | 2022-01-11 | LA Clippers 2021-22 Game 26 - Denver 1/11/22 | 6921 | 4369 | Denver |
| 22 | 45 | Mon, Jan 17, 2022 | 3:30p | NaN | Indiana Pacers | W | NaN | 139 | 133 | 22 | 23 | W 1 | NaN | 2022-01-17 | LA Clippers 2021-22 Game 27 - Indiana 1/17/22 | 6901 | 4212 | Indiana |
| 23 | 54 | Thu, Feb 3, 2022 | 10:00p | NaN | Los Angeles Lakers | W | NaN | 111 | 110 | 27 | 27 | W 1 | NaN | 2022-02-03 | LA Clippers 2021-22 Game 28 - Los Angeles 2/3/22 | 7610 | 6913 | Los Angeles |
| 24 | 55 | Sun, Feb 6, 2022 | 9:00p | NaN | Milwaukee Bucks | L | NaN | 113 | 137 | 27 | 28 | L 1 | NaN | 2022-02-06 | LA Clippers 2021-22 Game 29 - Milwaukee 2/6/22 | 7678 | 6403 | Milwaukee |
| 25 | 59 | Mon, Feb 14, 2022 | 10:30p | NaN | Golden State Warriors | W | NaN | 119 | 104 | 29 | 30 | W 2 | NaN | 2022-02-14 | LA Clippers 2021-22 Game 30 - Golden State 2/1... | 8033 | 7311 | Golden State |
| 26 | 61 | Thu, Feb 17, 2022 | 10:30p | NaN | Houston Rockets | W | NaN | 142 | 111 | 30 | 31 | W 1 | NaN | 2022-02-17 | LA Clippers 2021-22 Game 31 - Houston 2/17/22 | 8006 | 5849 | Houston |
| 27 | 68 | Wed, Mar 9, 2022 | 10:30p | NaN | Washington Wizards | W | NaN | 115 | 109 | 35 | 33 | W 1 | NaN | 2022-03-09 | LA Clippers 2021-22 Game 34 - Washington 3/9/22 | 7734 | 5629 | Washington |
| 28 | 72 | Wed, Mar 16, 2022 | 10:30p | NaN | Toronto Raptors | L | NaN | 100 | 103 | 36 | 36 | L 2 | NaN | 2022-03-16 | LA Clippers 2021-22 Game 35 - Toronto 3/16/22 | 8189 | 6774 | Toronto |
| 29 | 75 | Fri, Mar 25, 2022 | 10:30p | NaN | Philadelphia 76ers | L | NaN | 97 | 122 | 36 | 39 | L 5 | NaN | 2022-03-25 | LA Clippers 2021-22 Game 36 - Philadelphia 3/2... | 8178 | 7162 | Philadelphia |
| 30 | 76 | Tue, Mar 29, 2022 | 10:00p | NaN | Utah Jazz | W | NaN | 121 | 115 | 37 | 39 | W 1 | NaN | 2022-03-29 | LA Clippers 2021-22 Game 37 - Utah 3/29/22 | 7853 | 6192 | Utah |
| 31 | 79 | Sun, Apr 3, 2022 | 9:30p | NaN | New Orleans Pelicans | W | NaN | 119 | 100 | 39 | 40 | W 2 | NaN | 2022-04-03 | LA Clippers 2021-22 Game 38 - New Orleans 4/3/22 | 7586 | 6164 | New Orleans |
| 32 | 80 | Wed, Apr 6, 2022 | 10:00p | NaN | Phoenix Suns | W | NaN | 113 | 109 | 40 | 40 | W 3 | NaN | 2022-04-06 | LA Clippers 2021-22 Game 39 - Phoenix 4/6/22 | 7850 | 6631 | Phoenix |
new_df_8games['win3'] = 'Y'
new_df_33games['win3'] = 'N'
home_games = new_df_8games.append(new_df_33games, ignore_index=True)
home_games = home_games.sort_values(by="event_date")
home_games
| G | Date | Start (ET) | homeaway | Opponent | W/L | OT | Tm | Opp | W | L | Streak | Notes | event_date | event_name | game_sales | game_attendance | City | win3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8 | 2 | Sat, Oct 23, 2021 | 10:30p | NaN | Memphis Grizzlies | L | NaN | 114 | 120 | 0 | 2 | L 2 | NaN | 2021-10-23 | LA Clippers 2021-22 Game 1 - Memphis 10/23/21 | 7934 | 6498 | Memphis | N |
| 9 | 3 | Mon, Oct 25, 2021 | 10:30p | NaN | Portland Trail Blazers | W | NaN | 116 | 86 | 1 | 2 | W 1 | NaN | 2021-10-25 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 7417 | 5738 | Portland | N |
| 10 | 4 | Wed, Oct 27, 2021 | 10:30p | NaN | Cleveland Cavaliers | L | NaN | 79 | 92 | 1 | 3 | L 1 | NaN | 2021-10-27 | LA Clippers 2021-22 Game 3 - Cleveland 10/27/21 | 7044 | 4866 | Cleveland | N |
| 11 | 6 | Mon, Nov 1, 2021 | 10:30p | NaN | Oklahoma City Thunder | W | NaN | 99 | 94 | 2 | 4 | W 1 | NaN | 2021-11-01 | LA Clippers 2021-22 Game 4 - Oklahoma City 11/... | 7142 | 4784 | Oklahoma City | N |
| 0 | 9 | Sun, Nov 7, 2021 | 9:00p | NaN | Charlotte Hornets | W | NaN | 120 | 106 | 5 | 4 | W 4 | NaN | 2021-11-07 | LA Clippers 2021-22 Game 5 - Charlotte 11/7/21 | 7838 | 6303 | Charlotte | Y |
| 1 | 10 | Tue, Nov 9, 2021 | 10:00p | NaN | Portland Trail Blazers | W | NaN | 117 | 109 | 6 | 4 | W 5 | NaN | 2021-11-09 | LA Clippers 2021-22 Game 6 - Portland 11/9/21 | 7339 | 5531 | Portland | Y |
| 2 | 11 | Thu, Nov 11, 2021 | 10:30p | NaN | Miami Heat | W | NaN | 112 | 109 | 7 | 4 | W 6 | NaN | 2021-11-11 | LA Clippers 2021-22 Game 7 - Miami 11/11/21 | 7340 | 6200 | Miami | Y |
| 12 | 12 | Sat, Nov 13, 2021 | 10:30p | NaN | Minnesota Timberwolves | W | NaN | 129 | 102 | 8 | 4 | W 7 | NaN | 2021-11-13 | LA Clippers 2021-22 Game 8 - Minnesota 11/13/21 | 7731 | 6045 | Minnesota | N |
| 13 | 13 | Sun, Nov 14, 2021 | 9:30p | NaN | Chicago Bulls | L | NaN | 90 | 100 | 8 | 5 | L 1 | NaN | 2021-11-14 | LA Clippers 2021-22 Game 9 - Chicago 11/14/21 | 7888 | 6789 | Chicago | N |
| 14 | 14 | Tue, Nov 16, 2021 | 10:30p | NaN | San Antonio Spurs | W | NaN | 106 | 92 | 9 | 5 | W 1 | NaN | 2021-11-16 | LA Clippers 2021-22 Game 10 - San Antonio 11/1... | 7126 | 4928 | San Antonio | N |
| 15 | 17 | Sun, Nov 21, 2021 | 3:30p | NaN | Dallas Mavericks | W | NaN | 97 | 91 | 10 | 7 | W 1 | NaN | 2021-11-21 | LA Clippers 2021-22 Game 11 - Dallas 11/21/21 | 6832 | 5085 | Dallas | N |
| 16 | 18 | Tue, Nov 23, 2021 | 10:30p | NaN | Dallas Mavericks | L | OT | 104 | 112 | 10 | 8 | L 1 | NaN | 2021-11-23 | LA Clippers 2021-22 Game 12 - Dallas 11/23/21 | 7339 | 5903 | Dallas | N |
| 17 | 19 | Fri, Nov 26, 2021 | 3:30p | NaN | Detroit Pistons | W | NaN | 107 | 96 | 11 | 8 | W 1 | NaN | 2021-11-26 | LA Clippers 2021-22 Game 13 - Detroit 11/26/21 | 7477 | 5210 | Detroit | N |
| 18 | 20 | Sun, Nov 28, 2021 | 3:30p | NaN | Golden State Warriors | L | NaN | 90 | 105 | 11 | 9 | L 1 | NaN | 2021-11-28 | LA Clippers 2021-22 Game 14 - Golden State 11/... | 7622 | 6832 | Golden State | N |
| 19 | 21 | Mon, Nov 29, 2021 | 10:30p | NaN | New Orleans Pelicans | L | NaN | 104 | 123 | 11 | 10 | L 2 | NaN | 2021-11-29 | LA Clippers 2021-22 Game 15 - New Orleans11/29/21 | 6713 | 4257 | New Orleans | N |
| 20 | 22 | Wed, Dec 1, 2021 | 10:30p | NaN | Sacramento Kings | L | NaN | 115 | 124 | 11 | 11 | L 3 | NaN | 2021-12-01 | LA Clippers 2021-22 Game 16 - Sacramento 12/1/21 | 6747 | 4558 | Sacramento | N |
| 21 | 26 | Wed, Dec 8, 2021 | 10:30p | NaN | Boston Celtics | W | NaN | 114 | 111 | 14 | 12 | W 2 | NaN | 2021-12-08 | LA Clippers 2021-22 Game 17 - Boston 12/8/21 | 7456 | 6179 | Boston | N |
| 22 | 27 | Sat, Dec 11, 2021 | 3:30p | NaN | Orlando Magic | W | NaN | 106 | 104 | 15 | 12 | W 3 | NaN | 2021-12-11 | LA Clippers 2021-22 Game 18 - Orlando 12/11/21 | 7361 | 4814 | Orlando | N |
| 3 | 28 | Mon, Dec 13, 2021 | 10:30p | NaN | Phoenix Suns | W | NaN | 111 | 95 | 16 | 12 | W 4 | NaN | 2021-12-13 | LA Clippers 2021-22 Game 19 - Phoenix 12/13/21 | 7678 | 6190 | Phoenix | Y |
| 23 | 31 | Mon, Dec 20, 2021 | 10:30p | NaN | San Antonio Spurs | L | NaN | 92 | 116 | 16 | 15 | L 3 | NaN | 2021-12-20 | LA Clippers 2021-22 Game 20 - San Antonio 12/2... | 7835 | 5778 | San Antonio | N |
| 24 | 33 | Sun, Dec 26, 2021 | 9:00p | NaN | Denver Nuggets | L | NaN | 100 | 103 | 17 | 16 | L 1 | NaN | 2021-12-26 | LA Clippers 2021-22 Game 21 - Denver 12/26/21 | 7399 | 5566 | Denver | N |
| 25 | 34 | Mon, Dec 27, 2021 | 10:30p | NaN | Brooklyn Nets | L | NaN | 108 | 124 | 17 | 17 | L 2 | NaN | 2021-12-27 | LA Clippers 2021-22 Game 22 - Brooklyn 12/27/21 | 7497 | 5829 | Brooklyn | N |
| 26 | 38 | Mon, Jan 3, 2022 | 10:30p | NaN | Minnesota Timberwolves | L | NaN | 104 | 122 | 19 | 19 | L 1 | NaN | 2022-01-03 | LA Clippers 2021-22 Game 23 - Minnesota 1/3/22 | 7170 | 4587 | Minnesota | N |
| 27 | 40 | Sat, Jan 8, 2022 | 3:30p | NaN | Memphis Grizzlies | L | NaN | 108 | 123 | 19 | 21 | L 3 | NaN | 2022-01-08 | LA Clippers 2021-22 Game 24 - Memphis 1/8/22 | 7591 | 4850 | Memphis | N |
| 28 | 41 | Sun, Jan 9, 2022 | 3:30p | NaN | Atlanta Hawks | W | NaN | 106 | 93 | 20 | 21 | W 1 | NaN | 2022-01-09 | LA Clippers 2021-22 Game 25 - Atlanta 1/9/22 | 6830 | 4053 | Atlanta | N |
| 29 | 42 | Tue, Jan 11, 2022 | 10:30p | NaN | Denver Nuggets | W | NaN | 87 | 85 | 21 | 21 | W 2 | NaN | 2022-01-11 | LA Clippers 2021-22 Game 26 - Denver 1/11/22 | 6921 | 4369 | Denver | N |
| 30 | 45 | Mon, Jan 17, 2022 | 3:30p | NaN | Indiana Pacers | W | NaN | 139 | 133 | 22 | 23 | W 1 | NaN | 2022-01-17 | LA Clippers 2021-22 Game 27 - Indiana 1/17/22 | 6901 | 4212 | Indiana | N |
| 31 | 54 | Thu, Feb 3, 2022 | 10:00p | NaN | Los Angeles Lakers | W | NaN | 111 | 110 | 27 | 27 | W 1 | NaN | 2022-02-03 | LA Clippers 2021-22 Game 28 - Los Angeles 2/3/22 | 7610 | 6913 | Los Angeles | N |
| 32 | 55 | Sun, Feb 6, 2022 | 9:00p | NaN | Milwaukee Bucks | L | NaN | 113 | 137 | 27 | 28 | L 1 | NaN | 2022-02-06 | LA Clippers 2021-22 Game 29 - Milwaukee 2/6/22 | 7678 | 6403 | Milwaukee | N |
| 33 | 59 | Mon, Feb 14, 2022 | 10:30p | NaN | Golden State Warriors | W | NaN | 119 | 104 | 29 | 30 | W 2 | NaN | 2022-02-14 | LA Clippers 2021-22 Game 30 - Golden State 2/1... | 8033 | 7311 | Golden State | N |
| 34 | 61 | Thu, Feb 17, 2022 | 10:30p | NaN | Houston Rockets | W | NaN | 142 | 111 | 30 | 31 | W 1 | NaN | 2022-02-17 | LA Clippers 2021-22 Game 31 - Houston 2/17/22 | 8006 | 5849 | Houston | N |
| 4 | 65 | Thu, Mar 3, 2022 | 10:00p | NaN | Los Angeles Lakers | W | NaN | 132 | 111 | 34 | 31 | W 5 | NaN | 2022-03-03 | LA Clippers 2021-22 Game 32 - Los Angeles 3/3/22 | 8073 | 7376 | Los Angeles | Y |
| 5 | 66 | Sun, Mar 6, 2022 | 10:00p | NaN | New York Knicks | L | NaN | 93 | 116 | 34 | 32 | L 1 | NaN | 2022-03-06 | LA Clippers 2021-22 Game 33 - New York 3/6/22 | 7925 | 6198 | New York | Y |
| 35 | 68 | Wed, Mar 9, 2022 | 10:30p | NaN | Washington Wizards | W | NaN | 115 | 109 | 35 | 33 | W 1 | NaN | 2022-03-09 | LA Clippers 2021-22 Game 34 - Washington 3/9/22 | 7734 | 5629 | Washington | N |
| 36 | 72 | Wed, Mar 16, 2022 | 10:30p | NaN | Toronto Raptors | L | NaN | 100 | 103 | 36 | 36 | L 2 | NaN | 2022-03-16 | LA Clippers 2021-22 Game 35 - Toronto 3/16/22 | 8189 | 6774 | Toronto | N |
| 37 | 75 | Fri, Mar 25, 2022 | 10:30p | NaN | Philadelphia 76ers | L | NaN | 97 | 122 | 36 | 39 | L 5 | NaN | 2022-03-25 | LA Clippers 2021-22 Game 36 - Philadelphia 3/2... | 8178 | 7162 | Philadelphia | N |
| 38 | 76 | Tue, Mar 29, 2022 | 10:00p | NaN | Utah Jazz | W | NaN | 121 | 115 | 37 | 39 | W 1 | NaN | 2022-03-29 | LA Clippers 2021-22 Game 37 - Utah 3/29/22 | 7853 | 6192 | Utah | N |
| 39 | 79 | Sun, Apr 3, 2022 | 9:30p | NaN | New Orleans Pelicans | W | NaN | 119 | 100 | 39 | 40 | W 2 | NaN | 2022-04-03 | LA Clippers 2021-22 Game 38 - New Orleans 4/3/22 | 7586 | 6164 | New Orleans | N |
| 40 | 80 | Wed, Apr 6, 2022 | 10:00p | NaN | Phoenix Suns | W | NaN | 113 | 109 | 40 | 40 | W 3 | NaN | 2022-04-06 | LA Clippers 2021-22 Game 39 - Phoenix 4/6/22 | 7850 | 6631 | Phoenix | N |
| 6 | 81 | Sat, Apr 9, 2022 | 9:30p | NaN | Sacramento Kings | W | NaN | 117 | 98 | 41 | 40 | W 4 | NaN | 2022-04-09 | LA Clippers 2021-22 Game 40 - Sacramento 4/9/22 | 7685 | 5942 | Sacramento | Y |
| 7 | 82 | Sun, Apr 10, 2022 | 9:30p | NaN | Oklahoma City Thunder | W | NaN | 138 | 88 | 42 | 40 | W 5 | NaN | 2022-04-10 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | 7534 | 5972 | Oklahoma City | Y |
#Make a plot to show the difference in quantity of attendance.
import plotly.express as px
import plotly.offline as pyo
pyo.init_notebook_mode()
fig1 = px.bar(data_frame = home_games, x = 'event_date', y = 'game_attendance', color = 'win3', barmode = 'group',
template = 'plotly_white', labels={
'event_date': "The exact date of the game",
'game_attendance': "Number of attendance",
},
title="Attendance in 8 home games after 3 win streak vs. other home games")
fig1.show()
#Make a plot to show the difference in quantity of ticket purchase.
import plotly.express as px
fig2 = px.bar(data_frame = home_games, x = 'event_date', y = 'game_sales', color = 'win3', barmode = 'group',
template = 'plotly_white', labels={
'event_date': "The exact date of the game",
'game_sales': "Sales per game",
},
title="Sales per game in 8 home games after 3 win streak vs. other home games")
fig2.show()
#Summary statistics between after W3 streak home games vs. other home games.
print(home_games.groupby('win3').describe())
game_sales \
count mean std min 25% 50% 75%
win3
N 33.0 7475.454545 424.285067 6713.0 7142.0 7497.0 7835.00
Y 8.0 7676.500000 265.266981 7339.0 7485.5 7681.5 7859.75
game_attendance \
max count mean std min 25% 50%
win3
N 8189.0 33.0 5659.333333 939.821614 4053.0 4850.0 5778.0
Y 8073.0 8.0 6214.000000 528.911281 5531.0 5964.5 6194.0
75% max
win3
N 6403.00 7311.0
Y 6225.75 7376.0
sales_att_by_game.head()
| event_name | event_date | game_sales | game_attendance | City | |
|---|---|---|---|---|---|
| 0 | LA Clippers 2021-22 Game 1 - Memphis 10/23/21 | 2021-10-23 | 7934 | 6498 | Memphis |
| 11 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 2021-10-25 | 7417 | 5738 | Portland |
| 22 | LA Clippers 2021-22 Game 3 - Cleveland 10/27/21 | 2021-10-27 | 7044 | 4866 | Cleveland |
| 33 | LA Clippers 2021-22 Game 4 - Oklahoma City 11/... | 2021-11-01 | 7142 | 4784 | Oklahoma City |
| 36 | LA Clippers 2021-22 Game 5 - Charlotte 11/7/21 | 2021-11-07 | 7838 | 6303 | Charlotte |
#Select the averae attendance by oponent city
opp = """
SELECT City AS 'opponent_city', AVG(game_attendance) AS 'avg_att_by_opponent'
FROM sales_att_by_game
GROUP BY City;
"""
average_att_by_opponent = pysqldf(opp)
#Select the top 3 opponent with highest attendance:
top3 = average_att_by_opponent.nlargest(3,['avg_att_by_opponent'])
top3
| opponent_city | avg_att_by_opponent | |
|---|---|---|
| 21 | Philadelphia | 7162.0 |
| 12 | Los Angeles | 7144.5 |
| 9 | Golden State | 7071.5 |
#select the other opponent
other = """
SELECT AVG(avg_att_by_opponent) AS 'avg_att_by_opponent'
FROM average_att_by_opponent
WHERE opponent_city != 'Philadelphia'
AND opponent_city != 'Los Angeles'
AND opponent_city != 'Golden State';
"""
pysqldf(other)
| avg_att_by_opponent | |
|---|---|
| 0 | 5622.615385 |
all = top3.append({'opponent_city': 'others', 'avg_att_by_opponent': 5622.6}, ignore_index=True)
all
| opponent_city | avg_att_by_opponent | |
|---|---|---|
| 0 | Philadelphia | 7162.0 |
| 1 | Los Angeles | 7144.5 |
| 2 | Golden State | 7071.5 |
| 3 | others | 5622.6 |
#px.bar(data_frame = all, x = 'opponent_city', y = 'avg_att_by_opponent', barmode = 'group',template = 'plotly_white')
ax = all.plot.bar(x = 'opponent_city', y = 'avg_att_by_opponent', rot=0)
sales_att_by_game.head()
| event_name | event_date | game_sales | game_attendance | City | |
|---|---|---|---|---|---|
| 0 | LA Clippers 2021-22 Game 1 - Memphis 10/23/21 | 2021-10-23 | 7934 | 6498 | Memphis |
| 11 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 2021-10-25 | 7417 | 5738 | Portland |
| 22 | LA Clippers 2021-22 Game 3 - Cleveland 10/27/21 | 2021-10-27 | 7044 | 4866 | Cleveland |
| 33 | LA Clippers 2021-22 Game 4 - Oklahoma City 11/... | 2021-11-01 | 7142 | 4784 | Oklahoma City |
| 36 | LA Clippers 2021-22 Game 5 - Charlotte 11/7/21 | 2021-11-07 | 7838 | 6303 | Charlotte |
avg = """
SELECT AVG(game_attendance) AS average_att_per_game
FROM sales_att_by_game
"""
pysqldf(avg)
| average_att_per_game | |
|---|---|
| 0 | 5767.560976 |
average_att_by_opponent.head()
| opponent_city | avg_att_by_opponent | |
|---|---|---|
| 0 | Atlanta | 4053.0 |
| 1 | Boston | 6179.0 |
| 2 | Brooklyn | 5829.0 |
| 3 | Charlotte | 6303.0 |
| 4 | Chicago | 6789.0 |
team2
#We select the variables that could be associated with the ticket sales and attendances. It is obvious those factors can also determine whether the team is good or not.
team1.rename(columns={"Team Full Name": "Opponent"}, inplace=True)
team3 = team1[["Opponent", "Conference", "Playoffs in 2020-2021", "Playoffs in 2021-2022", "Championships", "Vegas Over/Under 21/22", "Vegas Over/Under 22/23"]]
home_games.head()
#We merge the home games data with those variables of the team into a whole dataset
home_games1 = home_games.merge(team3, on='Opponent')
home_games1.columns
Index(['G', 'Date', 'Start (ET)', 'homeaway', 'Opponent', 'W/L', 'OT', 'Tm',
'Opp', 'W', 'L', 'Streak', 'Notes', 'event_date', 'event_name',
'game_sales', 'game_attendance', 'City', 'win3', 'Conference',
'Playoffs in 2020-2021', 'Playoffs in 2021-2022', 'Championships',
'Vegas Over/Under 21/22', 'Vegas Over/Under 22/23'],
dtype='object')
We decided to use the variables that would be related to the home game attendance:
#Convert yes or no to 1 or 0 , for convenience
home_games1["Conference"].replace(['Western', 'Eastern'],
[1, 0], inplace=True)
home_games1["Playoffs in 2020-2021"].replace(['Yes', 'No'],
[1, 0], inplace=True)
home_games1["Playoffs in 2021-2022"].replace(['Yes', 'No'],
[1, 0], inplace=True)
#Get dummy varibles from oponent variables:
dum = pd.get_dummies(home_games1['Opponent'])
dum.columns
#Select only the top three & insert columns to home_games1 dataframe
three = dum[['Golden State Warriors', 'Philadelphia 76ers', 'Los Angeles Lakers']]
home_games1['Golden State Warriors'] = three['Golden State Warriors']
home_games1['Philadelphia 76ers'] = three['Philadelphia 76ers']
home_games1['Los Angeles Lakers'] = three['Los Angeles Lakers']
#Check the unique value in streak variable
home_games1['Streak'].unique()
home_games1["Streak"].replace(['L 2', 'L 3', 'W 1', 'W 5', 'L 1', 'W 4', 'W 6', 'W 7', 'W 2',
'W 3', 'L 5'],
[-2, -3, 1, 5, -1, 4, 6, 7, 2, 3, -5], inplace=True)
#Add weekday/weekend column
#home_games1['Date'].reset_index(drop=True).str.split(",", expand = True).head()
home_games1['weekday'] = home_games1['Date'].reset_index(drop=True).str.split(",", expand = True).drop(columns = 2)[0]
def get_days(a):
if a in ['Sat','Sun']:
return 1
else:
return 0
#Convert to weekend:1 weekday:0
home_games1['weekend'] = home_games1['weekday'].apply(get_days)
#home_games1["weekday"] = home_games1["weekday"].replace(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat','Sun'],
#[1, 2, 3, 4, 5, 6, 7], inplace=True)
#home_games1["weekday"] = home_games1["weekday"].astype(object)
home_games1.head()
| G | Date | Start (ET) | homeaway | Opponent | W/L | OT | Tm | Opp | W | ... | Playoffs in 2020-2021 | Playoffs in 2021-2022 | Championships | Vegas Over/Under 21/22 | Vegas Over/Under 22/23 | Golden State Warriors | Philadelphia 76ers | Los Angeles Lakers | weekday | weekend | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2 | Sat, Oct 23, 2021 | 10:30p | NaN | Memphis Grizzlies | L | NaN | 114 | 120 | 0 | ... | 1 | 1 | 0 | 41.5 | 48.5 | 0 | 0 | 0 | Sat | 1 |
| 1 | 40 | Sat, Jan 8, 2022 | 3:30p | NaN | Memphis Grizzlies | L | NaN | 108 | 123 | 19 | ... | 1 | 1 | 0 | 41.5 | 48.5 | 0 | 0 | 0 | Sat | 1 |
| 2 | 3 | Mon, Oct 25, 2021 | 10:30p | NaN | Portland Trail Blazers | W | NaN | 116 | 86 | 1 | ... | 1 | 0 | 1 | 44.5 | 39.5 | 0 | 0 | 0 | Mon | 0 |
| 3 | 10 | Tue, Nov 9, 2021 | 10:00p | NaN | Portland Trail Blazers | W | NaN | 117 | 109 | 6 | ... | 1 | 0 | 1 | 44.5 | 39.5 | 0 | 0 | 0 | Tue | 0 |
| 4 | 4 | Wed, Oct 27, 2021 | 10:30p | NaN | Cleveland Cavaliers | L | NaN | 79 | 92 | 1 | ... | 0 | 0 | 1 | 26.5 | 47.5 | 0 | 0 | 0 | Wed | 0 |
5 rows × 30 columns
#Add Event Month column:
g = """
SELECT *, strftime('%m',event_date) AS month
FROM home_games1
"""
home_games1 = pysqldf(g)
home_games1
#Convert to factor:
home_games1["month"] = home_games1["month"].astype(object)
#We decide to select the variables that are related to the attendance of the game.
#The reason for not selecting "Vegas Over/Under 22/23" is because we are now predicting 2021-2022 season attendance
home_games2122 = home_games1[[ "Conference","Playoffs in 2020-2021", "Championships", "game_sales", "Vegas Over/Under 21/22" , "Streak", "game_attendance",
'Golden State Warriors', 'Philadelphia 76ers', 'Los Angeles Lakers','weekend','month']]
#home_games2122.dtypes
home_games2122[home_games2122.columns[0:]].corr()['game_attendance'].sort_values()
Conference -0.006147 weekend 0.057169 Streak 0.117491 Playoffs in 2020-2021 0.185885 Philadelphia 76ers 0.248776 Golden State Warriors 0.333180 Los Angeles Lakers 0.351833 Vegas Over/Under 21/22 0.400282 Championships 0.440146 game_sales 0.858419 game_attendance 1.000000 Name: game_attendance, dtype: float64
home_games2122[home_games2122.columns[0:]].corr()['game_sales'].sort_values()
Conference -0.078228 Streak 0.024697 Playoffs in 2020-2021 0.081492 weekend 0.090679 Vegas Over/Under 21/22 0.132727 Golden State Warriors 0.177730 Los Angeles Lakers 0.185684 Championships 0.215935 Philadelphia 76ers 0.263135 game_attendance 0.858419 game_sales 1.000000 Name: game_sales, dtype: float64
import seaborn as sns
#correlation graph
sns.heatmap(home_games2122.corr(), annot=True , cmap="Blues").set(title='Heatmap of Home-game factors - Pearson Correlations');
home_games2122.head()
| Conference | Playoffs in 2020-2021 | Championships | game_sales | Vegas Over/Under 21/22 | Streak | game_attendance | Golden State Warriors | Philadelphia 76ers | Los Angeles Lakers | weekend | month | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1 | 0 | 7934 | 41.5 | -2 | 6498 | 0 | 0 | 0 | 1 | 10 |
| 1 | 1 | 1 | 0 | 7591 | 41.5 | -3 | 4850 | 0 | 0 | 0 | 1 | 01 |
| 2 | 1 | 1 | 1 | 7417 | 44.5 | 1 | 5738 | 0 | 0 | 0 | 0 | 10 |
| 3 | 1 | 1 | 1 | 7339 | 44.5 | 5 | 5531 | 0 | 0 | 0 | 0 | 11 |
| 4 | 0 | 0 | 1 | 7044 | 26.5 | -1 | 4866 | 0 | 0 | 0 | 0 | 10 |
#Predict 21-22 regular season game attendance
#split the train and validation data
from sklearn.model_selection import train_test_split
X = home_games2122.drop(home_games2122.columns[[6]],axis = 1)
y = home_games2122['game_attendance']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
#Build the linear regression model
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
reg = LinearRegression().fit(X_train, y_train)
y_pred = reg.predict(X_val)
reg.score(X_train,y_train) #Return the coefficient of determination of the prediction R^2.
0.9280606546854155
#See how good is the result
predscompare = pd.DataFrame({'Actual': y_val, 'Predicted': y_pred})
print(predscompare)
from sklearn import metrics
print({'mean_absolute error': metrics.mean_absolute_error(y_val, y_pred), 'root mean squared error': np.sqrt(metrics.mean_squared_error(y_val, y_pred))})
Actual Predicted
24 4814 5106.661901
13 5778 6275.125813
8 6200 5732.591025
25 6190 6500.953298
4 4866 4468.945264
40 6192 6420.870422
19 4257 4397.928053
39 7162 6809.805055
29 5829 6101.028571
{'mean_absolute error': 328.8029682725553, 'root mean squared error': 345.7494015487711}
importance = reg.coef_
intercept = reg.intercept_
coefs = pd.DataFrame(
reg.coef_,
columns=['Coefficients'], index=X_train.columns
)
coefs.plot(kind='barh', figsize=(9, 7))
plt.title('Linear model')
plt.axvline(x=0, color='.5')
plt.subplots_adjust(left=.3)
for i,v in enumerate(importance):
print('Feature: %0d, Coefficient: %.5f' % (i,v))
Feature: 0, Coefficient: 118.59845 Feature: 1, Coefficient: -231.51631 Feature: 2, Coefficient: 26.95415 Feature: 3, Coefficient: 1.85317 Feature: 4, Coefficient: 32.81536 Feature: 5, Coefficient: 18.32986 Feature: 6, Coefficient: 244.36717 Feature: 7, Coefficient: 0.00000 Feature: 8, Coefficient: 271.29478 Feature: 9, Coefficient: 50.22130 Feature: 10, Coefficient: 42.46817
#Print the summary of the model result:
import statsmodels.api as sm
X = sm.add_constant(X)
stat = sm.OLS(y, X).fit()
stat.summary()
| Dep. Variable: | game_attendance | R-squared: | 0.891 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.855 |
| Method: | Least Squares | F-statistic: | 24.56 |
| Date: | Sat, 01 Oct 2022 | Prob (F-statistic): | 9.00e-12 |
| Time: | 22:16:03 | Log-Likelihood: | -290.98 |
| No. Observations: | 41 | AIC: | 604.0 |
| Df Residuals: | 30 | BIC: | 622.8 |
| Df Model: | 10 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | -8084.8171 | 1172.236 | -6.897 | 0.000 | -1.05e+04 | -5690.793 |
| Conference | 66.4609 | 124.811 | 0.532 | 0.598 | -188.437 | 321.359 |
| Playoffs in 2020-2021 | -188.8087 | 177.977 | -1.061 | 0.297 | -552.287 | 174.670 |
| Championships | 41.4839 | 20.419 | 2.032 | 0.051 | -0.218 | 83.185 |
| Vegas Over/Under 21/22 | 29.1550 | 9.439 | 3.089 | 0.004 | 9.877 | 48.433 |
| Streak | 35.9768 | 21.602 | 1.665 | 0.106 | -8.140 | 80.093 |
| game_sales | 1.6660 | 0.149 | 11.196 | 0.000 | 1.362 | 1.970 |
| Golden State Warriors | 294.1041 | 335.546 | 0.876 | 0.388 | -391.172 | 979.380 |
| Philadelphia 76ers | 394.1528 | 399.817 | 0.986 | 0.332 | -422.382 | 1210.688 |
| Los Angeles Lakers | -56.0966 | 425.075 | -0.132 | 0.896 | -924.216 | 812.023 |
| weekend | 65.4311 | 118.071 | 0.554 | 0.584 | -175.703 | 306.565 |
| Omnibus: | 1.992 | Durbin-Watson: | 2.081 |
|---|---|---|---|
| Prob(Omnibus): | 0.369 | Jarque-Bera (JB): | 1.531 |
| Skew: | -0.473 | Prob(JB): | 0.465 |
| Kurtosis: | 2.955 | Cond. No. | 1.68e+05 |
#game_table.join(predscompare)
#univariate regression graph for each features and game attendance variable
variables = ["Conference", "Playoffs in 2020-2021", "Championships", "Vegas Over/Under 21/22" , "Streak", "game_sales",
'Golden State Warriors', 'Philadelphia 76ers', 'Los Angeles Lakers']
for var in variables:
plt.figure() # Creating a rectangle (figure) for each plot
# Regression Plot also by default includes
# best-fitting regression line
# which can be turned off via `fit_reg=False`
sns.regplot(x=var, y='game_attendance', data= home_games2122).set(title=f'Regression plot of {var} and Game Attendance');
#Fit model to predict 22-23 home games attendance, replace the X variables with 22/23's data
home_games2223 = home_games1[["Conference", "Playoffs in 2021-2022", "Championships", "Vegas Over/Under 22/23" , "Streak", "game_sales", "game_attendance",
'Golden State Warriors', 'Philadelphia 76ers', 'Los Angeles Lakers','weekend','month']]
x_val = home_games2223.drop(home_games2122.columns[[6]],axis = 1)
y_ = reg.predict(x_val)
y_
array([6732.22472808, 5696.04371949, 5742.03588642, 5713.27617977,
5158.06789553, 4749.83626484, 5302.54277123, 6425.90116534,
5732.59102541, 6563.46797473, 4902.29722131, 6554.01119425,
4795.18678523, 6078.23363238, 4814.44238102, 5667.11897009,
5502.85052857, 6746.32580611, 7130.53375133, 4330.48855903,
5774.57023618, 4315.13614639, 5892.19533981, 6458.14254558,
5237.92335451, 6533.76866179, 6494.43880466, 5876.85990197,
4528.66263174, 5904.13639066, 4169.00977086, 3813.8502244 ,
6659.40591052, 7633.21162463, 6002.96878507, 5995.7164677 ,
6308.08674027, 5782.35392574, 6667.19439307, 6809.80505513,
5502.04024589])
home_games1.columns
Index(['G', 'Date', 'Start (ET)', 'homeaway', 'Opponent', 'W/L', 'OT', 'Tm',
'Opp', 'W', 'L', 'Streak', 'Notes', 'event_date', 'event_name',
'game_sales', 'game_attendance', 'City', 'win3', 'Conference',
'Playoffs in 2020-2021', 'Playoffs in 2021-2022', 'Championships',
'Vegas Over/Under 21/22', 'Vegas Over/Under 22/23',
'Golden State Warriors', 'Philadelphia 76ers', 'Los Angeles Lakers',
'weekday', 'weekend', 'month'],
dtype='object')
predict_att = pd.DataFrame({'Predicted': y_})
predict_2223 = home_games1[['event_name']].join(predict_att)
predict_2223
| event_name | Predicted | |
|---|---|---|
| 0 | LA Clippers 2021-22 Game 1 - Memphis 10/23/21 | 6732.224728 |
| 1 | LA Clippers 2021-22 Game 24 - Memphis 1/8/22 | 5696.043719 |
| 2 | LA Clippers 2021-22 Game 2 - Portland 10/25/21 | 5742.035886 |
| 3 | LA Clippers 2021-22 Game 6 - Portland 11/9/21 | 5713.276180 |
| 4 | LA Clippers 2021-22 Game 3 - Cleveland 10/27/21 | 5158.067896 |
| 5 | LA Clippers 2021-22 Game 4 - Oklahoma City 11/... | 4749.836265 |
| 6 | LA Clippers 2021-22 Game 41 - Oklahoma City 4/... | 5302.542771 |
| 7 | LA Clippers 2021-22 Game 5 - Charlotte 11/7/21 | 6425.901165 |
| 8 | LA Clippers 2021-22 Game 7 - Miami 11/11/21 | 5732.591025 |
| 9 | LA Clippers 2021-22 Game 8 - Minnesota 11/13/21 | 6563.467975 |
| 10 | LA Clippers 2021-22 Game 23 - Minnesota 1/3/22 | 4902.297221 |
| 11 | LA Clippers 2021-22 Game 9 - Chicago 11/14/21 | 6554.011194 |
| 12 | LA Clippers 2021-22 Game 10 - San Antonio 11/1... | 4795.186785 |
| 13 | LA Clippers 2021-22 Game 20 - San Antonio 12/2... | 6078.233632 |
| 14 | LA Clippers 2021-22 Game 11 - Dallas 11/21/21 | 4814.442381 |
| 15 | LA Clippers 2021-22 Game 12 - Dallas 11/23/21 | 5667.118970 |
| 16 | LA Clippers 2021-22 Game 13 - Detroit 11/26/21 | 5502.850529 |
| 17 | LA Clippers 2021-22 Game 14 - Golden State 11/... | 6746.325806 |
| 18 | LA Clippers 2021-22 Game 30 - Golden State 2/1... | 7130.533751 |
| 19 | LA Clippers 2021-22 Game 15 - New Orleans11/29/21 | 4330.488559 |
| 20 | LA Clippers 2021-22 Game 38 - New Orleans 4/3/22 | 5774.570236 |
| 21 | LA Clippers 2021-22 Game 16 - Sacramento 12/1/21 | 4315.136146 |
| 22 | LA Clippers 2021-22 Game 40 - Sacramento 4/9/22 | 5892.195340 |
| 23 | LA Clippers 2021-22 Game 17 - Boston 12/8/21 | 6458.142546 |
| 24 | LA Clippers 2021-22 Game 18 - Orlando 12/11/21 | 5237.923355 |
| 25 | LA Clippers 2021-22 Game 19 - Phoenix 12/13/21 | 6533.768662 |
| 26 | LA Clippers 2021-22 Game 39 - Phoenix 4/6/22 | 6494.438805 |
| 27 | LA Clippers 2021-22 Game 21 - Denver 12/26/21 | 5876.859902 |
| 28 | LA Clippers 2021-22 Game 26 - Denver 1/11/22 | 4528.662632 |
| 29 | LA Clippers 2021-22 Game 22 - Brooklyn 12/27/21 | 5904.136391 |
| 30 | LA Clippers 2021-22 Game 25 - Atlanta 1/9/22 | 4169.009771 |
| 31 | LA Clippers 2021-22 Game 27 - Indiana 1/17/22 | 3813.850224 |
| 32 | LA Clippers 2021-22 Game 28 - Los Angeles 2/3/22 | 6659.405911 |
| 33 | LA Clippers 2021-22 Game 32 - Los Angeles 3/3/22 | 7633.211625 |
| 34 | LA Clippers 2021-22 Game 29 - Milwaukee 2/6/22 | 6002.968785 |
| 35 | LA Clippers 2021-22 Game 31 - Houston 2/17/22 | 5995.716468 |
| 36 | LA Clippers 2021-22 Game 33 - New York 3/6/22 | 6308.086740 |
| 37 | LA Clippers 2021-22 Game 34 - Washington 3/9/22 | 5782.353926 |
| 38 | LA Clippers 2021-22 Game 35 - Toronto 3/16/22 | 6667.194393 |
| 39 | LA Clippers 2021-22 Game 36 - Philadelphia 3/2... | 6809.805055 |
| 40 | LA Clippers 2021-22 Game 37 - Utah 3/29/22 | 5502.040246 |